import requests
import re
class Spider:
	"""
		糗事百科爬虫类
	"""
	def loadPage(self, page):
		"""
			@brief 定义一个url请求网页的方法
			@param page需要请求的第几页
			@returns 返回的页面url
		"""
		url = " https://www.qiushibaike.com/hot/page/" + str(page)+ "/"
		#user-Agent头
		user_agent = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT6.1; Trident/5.0"
		headers = {"User-Agent":user_agent}
		req = requests.get(url, headers = headers)
		pattern = re.compile(r'<div class="content">\n<span>(.*?)</span>',re.S)
		item_list = pattern.findall(req.text)
		return item_list
	def printOnePage(self, item_list, page):
		"""
			@brief 处理得到的段子列表
			@param item_list 得到的段子列表
			@param page处理第几页
		"""

		print("*********第%d页，爬取完毕...******"%page)
		for item in item_list:
			print("===============")
			print(item)

if __name__ == "__main__":
	"""
		=====================
			糗事百科小爬虫
		=====================
	"""
	print("请按下回车开始")
	input()
	
	#定义一个Spider对象
	mySpider = Spider()
	mySpider.printOnePage(mySpider.loadPage(1),1)
